from pymongo import MongoClient
import os
import json
import pymongo
client = MongoClient(port=27017)
db=client['tiktok']
obj={
    '_id':'',
    'text_feature':{
        'text':'',
        'stickerText':[],
    },
    'video_feature':{
        'text_embed':[],
        'img_embed':{},
        'audio':{
            'yamnet':[]
        },
        'editing':{},
        'label':{},
        'residual':{}
    },
    'img_feature':{
    }
}
add=['2018vs2021', '90saesthetic', 'albumcover', 'amongus', 'aprilfools', 'arabtiktok', 'athletesoftiktok', 'autismawareness', 'bakedoats', 'baseballisback', 'beautyhack', 'beautyhacks', 'blackandproud', 'careeradvice', 'carhacks', 'cartiktok', 'cleaningtiktok', 'colddays', 'colorblast', 'competitivegaming', 'coversforlovers', 'crowdcheers', 'dailyvlog', 'dayandnight', 'defrosting', 'dinnerparty', 'diyprojects', 'doctorsoftiktok', 'dramaticmoments', 'earthday', 'easyrecipe', 'ecohacks', 'emophase', 'fantheory', 'feelinggood', 'feliznavidad', 'fetapasta', 'fitnesslife', 'foodie', 'foodontiktok']
add=['galentinesday','gamergoals','gamingtiktok','gettheretogether','givethanksnotpranks','glasspainting','goodmorning','GreenScreenScan','groupchat','guitartok','happyeaster','happyholi']
outdict={}

for hashtag in add:
    i=0
    scss=0
    fl=0
    hashtag=hashtag.replace('#','').lower()
    for fname in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
        if hashtag in fname.lower():
            with open('D:\\Work\\Tool\\tiktok\\TikToks\\'+fname, 'r', encoding='utf-8', newline='\n') as filename_input:
                ranking=0
                for line in filename_input:

                    z = json.loads(line)
                    text=''
                    stext=[]
                    id=''
                    if 'id' in z.keys():
                        id = z['id']

                        if 'stickerTextList' in z.keys():
                            for item in z['stickerTextList']:
                                if type(item['stickerText']) is list:
                                    for st in item['stickerText']:
                                        stext.append(st)
                        elif 'stickersOnItem' in z.keys():
                            for item in z['stickersOnItem']:
                                if 'stickerText' in z.keys():
                                    if type(item['stickerText']) is list:
                                        for st in item['stickerText']:
                                            stext.append(st)
                                    else:
                                        stext.append(item['stickerText'])
                        text=z['desc']
                    elif 'itemInfos' in z.keys():
                        id = z['itemInfos']['id']

                        if 'stickerTextList' in z.keys():
                            for item in z['stickerTextList']:
                                if type(item['stickerText']) is list:
                                    for st in item['stickerText']:
                                        stext.append(st)
                        elif 'stickersOnItem' in z.keys():
                            for item in z['stickersOnItem']:
                                if 'stickerText' in z.keys():
                                    if type(item['stickerText']) is list:
                                        for st in item['stickerText']:
                                            stext.append(st)
                                    else:
                                        stext.append(item['stickerText'])
                        text=z['itemInfos']['text']
                    ranking += 1
                    if ranking > 2000:
                        break
                    if os.path.exists('E:\\Tiktok\\Hashtag\\' + hashtag + '\\' + id + '.mp4') and id not in outdict.keys() :
                        outdict[id]=text
                        temp=obj
                        temp['_id']=id
                        temp['text_feature']['text']=text
                        temp['text_feature']['stickerText']=stext
                        try:
                            db[hashtag].insert_one(temp)
                            scss+=1
                        except pymongo.errors.DuplicateKeyError:
                            fl+=1
                        i+=1
    print(hashtag,scss,fl)